{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2 as cv\n",
    "import numpy as np\n",
    "import pycuda.gpuarray as gpuarray\n",
    "import pycuda.driver as cuda\n",
    "import pycuda.autoinit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_gpu = gpuarray.to_gpu(np.random.randn(4, 4).astype(np.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_doubled = (2 * b_gpu).get()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.7436429  -1.8159572   0.28205386  1.7937574 ]\n",
      " [-0.4242038   0.8123264   0.8644576   0.00816789]\n",
      " [-0.44237193 -0.27193326 -1.2570636   0.20925684]\n",
      " [ 0.15309955  0.4520917  -0.55670154  1.7599062 ]]\n",
      "[[-1.4872859  -3.6319144   0.5641077   3.5875149 ]\n",
      " [-0.8484076   1.6246527   1.7289152   0.01633577]\n",
      " [-0.88474387 -0.5438665  -2.5141273   0.4185137 ]\n",
      " [ 0.3061991   0.9041834  -1.1134031   3.5198123 ]]\n"
     ]
    }
   ],
   "source": [
    "print b_gpu\n",
    "print b_doubled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(256, 256, 3)\n"
     ]
    }
   ],
   "source": [
    "PATH = \"/home/nvidia-mm/Workspace/frame_diff_py/lena.jpg\"\n",
    "I = cv.imread(PATH,cv.IMREAD_COLOR)\n",
    "print I.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cv.imshow('image',I)\n",
    "# if 27 == cv.waitKey(0):\n",
    "#     cv.destroyAllWindows()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "120.0 - 120.0\n"
     ]
    }
   ],
   "source": [
    "I_gpu_a = gpuarray.to_gpu(I.astype(np.float32))\n",
    "I_gpu_b = gpuarray.to_gpu(I.astype(np.float32))\n",
    "I_div = (I_gpu_a * 2 - I_gpu_b).get()\n",
    "print I_gpu_a[0][0][0],'-',I_div[0][0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pycuda.driver as cuda\n",
    "import pycuda.autoinit\n",
    "from pycuda.compiler import SourceModule\n",
    "import pycuda.gpuarray as gpuarray\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.random.randn(4, 4)\n",
    "a = a.astype(np.float32)\n",
    "\n",
    "a_gpu = cuda.mem_alloc(a.nbytes)\n",
    "cuda.memcpy_htod(a_gpu, a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "mod = SourceModule(\"\"\"\n",
    "        __global__ void doublify(float *a)\n",
    "        {\n",
    "          int idx = threadIdx.x + threadIdx.y*4;\n",
    "          a[idx] *= 2;\n",
    "        }\n",
    "        __global__ void ImgSub(float **A, float **B, int size)\n",
    "        {\n",
    "          int idx = blockIdx.x*blockDim.x + threadIdx.x;\n",
    "          int idy = blockIdx.y*blockDim.y + threadIdx.y;\n",
    "          float tmp = 0;\n",
    "          if(idx < size && idy < size){\n",
    "            tmp = B[idx][idy];\n",
    "            B[idx][idy] = A[idx][idy];\n",
    "            if(A[idx][idy] > tmp){\n",
    "              if(A[idx][idy] - tmp > 25){\n",
    "                A[idx][idy] = 0;\n",
    "              }\n",
    "            }else{\n",
    "              if(tmp - A[idx][idy] > 25){\n",
    "                A[idx][idy] = 0;\n",
    "              }\n",
    "            }\n",
    "          }\n",
    "        }\n",
    "        \"\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.26412612 -2.599057   -1.8459568  -1.5173056 ]\n",
      " [-0.22525242 -1.240282   -0.01318387  1.2576464 ]\n",
      " [-0.43277848 -0.35822362 -2.4905665   0.18955706]\n",
      " [ 0.64920217 -0.37311763 -2.157971    4.828526  ]]\n",
      "[[-0.13206306 -1.2995285  -0.9229784  -0.7586528 ]\n",
      " [-0.11262621 -0.620141   -0.00659193  0.6288232 ]\n",
      " [-0.21638924 -0.17911181 -1.2452832   0.09477853]\n",
      " [ 0.32460108 -0.18655881 -1.0789855   2.414263  ]]\n"
     ]
    }
   ],
   "source": [
    "func = mod.get_function(\"doublify\")\n",
    "func(a_gpu, block=(4,4,1))\n",
    "a_doubled = np.empty_like(a)\n",
    "cuda.memcpy_dtoh(a_doubled, a_gpu)\n",
    "\n",
    "print a_doubled\n",
    "print a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "img = cv.imread(PATH,cv.IMREAD_GRAYSCALE)\n",
    "img = img.astype(np.float32)\n",
    "# img_linear = np.ravel(img,'C')\n",
    "img_res = np.zeros(img.shape).astype(np.float32)\n",
    "w = np.int32(256)\n",
    "h = np.int32(256)\n",
    "\n",
    "img_a_gpu = cuda.mem_alloc(img.nbytes)\n",
    "img_b_gpu = cuda.mem_alloc(img.nbytes)\n",
    "cuda.memcpy_htod(img_a_gpu, img)\n",
    "cuda.memcpy_htod(img_b_gpu, img)\n",
    "func = mod.get_function(\"ImgSub\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pycuda._driver.DeviceAllocation'>\n"
     ]
    }
   ],
   "source": [
    "func(img_a_gpu, img_b_gpu, w, block=(16,16,2), grid=(16,16))\n",
    "print type(img_a_gpu)\n",
    "# cuda.memcpy_dtoh(img_res, img_a_gpu)\n",
    "# img_res = cuda.from_device(img_a_gpu,img.shape,np.uint8,order='C')\n",
    "img_res = img_res.astype(np.uint8)\n",
    "cv.imshow('img_moving', img_res)\n",
    "if 27 == cv.waitKey(0):\n",
    "    cv.destroyAllWindows()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
